1 Data

Preparations in file 00.Rmd

dog_ownership_cost <- read_rds("data/dog_ownership_cost.Rds") %>% 
  select(-cost_compared_to_other_breeds)

length(unique(dog_ownership_cost$SSC_NAME16))
[1] 187
length(unique(dog_ownership_cost$dog_breed))
[1] 182
SSC <- read_rds("data/geo/SSC.Rds")

length(unique(SSC$SSC_NAME16))
[1] 184
wide_cost_n <- read_rds("data/wide_cost_n.Rds")
wide_cost_p <- read_rds("data/wide_cost_p.Rds")

2 All Brisbane dogs combined

2.1 Dogs per capita

Summarizing all dogs, and expensive only.

dog_ownership_agg <- dog_ownership_cost %>% 
  group_by(SSC_NAME16) %>% 
  summarise(dogs_total = n(),
            dogs_exp = sum(expensive))

SSC %<>% 
  left_join(dog_ownership_agg)

2.1.1 Missing SEIFA

Areas with no URP/SEIFA but having (small amount of) dogs:

          SSC_NAME16 dogs_total
1         Eagle Farm          1
2 Enoggera Reservoir          3
3          Karawatha          6
4             Lytton          1

Since these areas have no SEIFA they will be excluded.

2.1.2 Missing dogs

Area with low URP and no dogs at all:

     SSC_NAME16 dogs_total URP
1 England Creek         NA  33

NA was assumed to mean 0 here.

2.1.3 Ranking

2.1.4 Map

2.2 Proportion of expensive dogs

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.1178  0.1366  0.1403  0.1650  0.2636       1 

2.2.1 Ranking

2.2.2 Map

3 Association with SEIFA

3.1 Functions

seifa_means <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>% 
    st_drop_geometry() %>% 
    group_by(!!myenc) %>% 
    summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
              sd = sd(dogs_exp_prop, na.rm = TRUE),
              p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
              p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
              p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>% 
    ungroup()
}

seifa_cor <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>%
    st_drop_geometry() %>%
    select(!!myenc, dogs_exp_prop) %>%
    mutate_if(is.factor, as.numeric) %>%
    correlation(method = "kendall")
  
}

seifa_plot <- function (seifa_index) {
  
  model <- eval(substitute(lm(dogs_exp_prop ~ seifa_index, 
                              data = SSC, na.action = na.omit)))
  means <- estimate_means(model)
  
  myenc <- enquo(seifa_index)
  
  ggplot(SSC,
         aes(x = !!myenc,
             y = dogs_exp_prop,
             fill = !!myenc)) +
    geom_violin(alpha = 0.66) +
    geom_jitter2(width = 0.05, alpha = 0.5) +
    geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
    geom_pointrange(data = means,
                    aes(y = Mean, ymin = CI_low, ymax = CI_high),
                    size = 1,
                    color = "white") + 
    scale_fill_brewer(palette = "BrBG") +
    ylab("Proportion of expensive dogs") +
    theme_modern()
  
}

3.2 IRSD

3.2.1 Recalculated

seifa_means(IRSD_d)
# A tibble: 10 x 6
   IRSD_d  mean     sd    p25   p50   p75
   <fct>  <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1      0.161 0.0443 0.146  0.159 0.184
 2 2      0.130 0.0833 0.0913 0.155 0.170
 3 3      0.147 0.0258 0.129  0.140 0.160
 4 4      0.147 0.0411 0.121  0.144 0.173
 5 5      0.145 0.0305 0.130  0.137 0.155
 6 6      0.142 0.0350 0.115  0.136 0.156
 7 7      0.127 0.0213 0.113  0.128 0.136
 8 8      0.148 0.0268 0.125  0.148 0.163
 9 9      0.126 0.0351 0.112  0.122 0.141
10 10     0.127 0.0336 0.104  0.114 0.152
seifa_cor(IRSD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSD_d     | dogs_exp_prop | -0.19 | [-0.28, -0.10] | -3.69 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d)

3.2.2 Original

seifa_means(IRSD_d_orig)
# A tibble: 10 x 6
   IRSD_d_orig  mean     sd    p25   p50   p75
   <fct>       <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1           0.185 0.0179 0.179  0.193 0.195
 2 2           0.164 0.0605 0.138  0.161 0.179
 3 3           0.142 0.0540 0.125  0.159 0.180
 4 4           0.161 0.0163 0.151  0.158 0.159
 5 5           0.150 0.0654 0.122  0.160 0.178
 6 6           0.122 0.0813 0.0962 0.136 0.166
 7 7           0.143 0.0346 0.124  0.142 0.166
 8 8           0.150 0.0312 0.130  0.140 0.157
 9 9           0.136 0.0321 0.112  0.131 0.149
10 10          0.134 0.0316 0.113  0.127 0.156
seifa_cor(IRSD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1  |    Parameter2 |   tau |         95% CI |     z |         p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.08] | -3.31 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d_orig)

3.3 IRSAD

3.3.1 Recalculated

seifa_means(IRSAD_d)
# A tibble: 10 x 6
   IRSAD_d  mean     sd   p25   p50   p75
   <fct>   <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1       0.156 0.0641 0.138 0.162 0.186
 2 2       0.140 0.0689 0.130 0.156 0.173
 3 3       0.143 0.0232 0.127 0.144 0.157
 4 4       0.157 0.0408 0.135 0.158 0.185
 5 5       0.139 0.0311 0.128 0.135 0.146
 6 6       0.137 0.0262 0.115 0.129 0.154
 7 7       0.136 0.0354 0.115 0.129 0.146
 8 8       0.141 0.0260 0.123 0.132 0.155
 9 9       0.132 0.0370 0.114 0.124 0.154
10 10      0.121 0.0315 0.101 0.112 0.127
seifa_cor(IRSAD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSAD_d    | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d)

3.3.2 Original

seifa_means(IRSAD_d_orig)
# A tibble: 10 x 6
   IRSAD_d_orig  mean     sd    p25   p50   p75
   <fct>        <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1            0.185 0.0179 0.179  0.193 0.195
 2 2            0.145 0.0955 0.0751 0.131 0.201
 3 3            0.161 0.0248 0.143  0.161 0.179
 4 4            0.138 0.0183 0.131  0.138 0.144
 5 5            0.153 0.0777 0.151  0.158 0.176
 6 6            0.126 0.0875 0.0641 0.159 0.177
 7 7            0.157 0.0258 0.136  0.156 0.173
 8 8            0.147 0.0297 0.127  0.144 0.155
 9 9            0.144 0.0352 0.129  0.143 0.162
10 10           0.133 0.0319 0.112  0.127 0.149
seifa_cor(IRSAD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1   |    Parameter2 |   tau |         95% CI |     z |         p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.13] | -3.98 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d_orig)

3.4 IER

3.4.1 Recalculated

seifa_means(IER_d)
# A tibble: 10 x 6
   IER_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.162 0.0452 0.136 0.165 0.187
 2 2     0.137 0.0331 0.126 0.134 0.154
 3 3     0.138 0.0210 0.118 0.137 0.152
 4 4     0.144 0.0410 0.115 0.129 0.152
 5 5     0.125 0.0702 0.110 0.138 0.165
 6 6     0.152 0.0288 0.129 0.156 0.172
 7 7     0.132 0.0424 0.113 0.139 0.150
 8 8     0.132 0.0256 0.115 0.129 0.153
 9 9     0.137 0.0425 0.114 0.122 0.146
10 10    0.144 0.0439 0.114 0.153 0.171
seifa_cor(IER_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d      | dogs_exp_prop | -0.08 | [-0.18, 0.02] | -1.56 | 0.118

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d)

3.4.2 Original

seifa_means(IER_d_orig)
# A tibble: 10 x 6
   IER_d_orig  mean     sd   p25   p50   p75
   <fct>      <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1          0.161 0.0440 0.140 0.162 0.183
 2 2          0.137 0.0316 0.124 0.132 0.154
 3 3          0.145 0.0387 0.117 0.137 0.163
 4 4          0.134 0.0263 0.114 0.128 0.144
 5 5          0.130 0.0693 0.114 0.143 0.173
 6 6          0.145 0.0297 0.127 0.138 0.161
 7 7          0.144 0.0340 0.128 0.141 0.154
 8 8          0.129 0.0465 0.127 0.130 0.152
 9 9          0.139 0.0369 0.115 0.132 0.164
10 10         0.138 0.0412 0.111 0.127 0.164
seifa_cor(IER_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.08 | [-0.18, 0.01] | -1.57 | 0.116

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d_orig)

3.5 IEO

3.5.1 Recalculated

seifa_means(IEO_d)
# A tibble: 10 x 6
   IEO_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.156 0.0646 0.133 0.165 0.188
 2 2     0.166 0.0265 0.152 0.159 0.178
 3 3     0.132 0.0683 0.123 0.144 0.162
 4 4     0.146 0.0292 0.123 0.145 0.165
 5 5     0.146 0.0385 0.130 0.147 0.162
 6 6     0.153 0.0306 0.131 0.152 0.171
 7 7     0.133 0.0276 0.117 0.129 0.136
 8 8     0.124 0.0304 0.115 0.125 0.132
 9 9     0.127 0.0209 0.113 0.127 0.132
10 10    0.119 0.0302 0.106 0.112 0.119
seifa_cor(IEO_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d      | dogs_exp_prop | -0.30 | [-0.39, -0.21] | -5.84 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d)

3.5.2 Original

seifa_means(IEO_d_orig)
# A tibble: 10 x 6
   IEO_d_orig   mean     sd    p25    p50    p75
   <fct>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1          0.153  0.0665 0.138  0.179  0.194 
 2 2          0.175  0.0911 0.131  0.180  0.222 
 3 3          0.0625 0.0884 0.0312 0.0625 0.0938
 4 4          0.172  0.0393 0.145  0.165  0.188 
 5 5          0.168  0.0221 0.151  0.171  0.175 
 6 6          0.166  0.0260 0.161  0.176  0.182 
 7 7          0.136  0.0733 0.127  0.154  0.159 
 8 8          0.150  0.0320 0.132  0.148  0.162 
 9 9          0.147  0.0331 0.130  0.147  0.165 
10 10         0.130  0.0300 0.112  0.127  0.144 
seifa_cor(IEO_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.35, -0.17] | -4.82 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d_orig)

4 PCA

data <- 
  # wide_cost_n %>%
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

# View(cov(data))
pca <- principal_components(data, 
                            standardize = FALSE,
                            n = "auto")
pca
# Loadings from Principal Component Analysis (no rotation)

Variable              |    PC1    | Complexity
----------------------------------------------
akita                 |   -0.04   |    1.00   
british_bulldog       |   -0.37   |    1.00   
dogue_de_bordeaux     |   -0.10   |    1.00   
french_bulldog        |   -0.52   |    1.00   
german_shepherd       |   -2.12   |    1.00   
irish_wolfhound       |   -0.21   |    1.00   
maltese               |   -6.12   |    1.00   
rottweiler            |   -0.86   |    1.00   
samoyed               |   -0.11   |    1.00   
yorkshire_terrier     |   -0.10   |    1.00   
chinese_crested_dog   |   -0.04   |    1.00   
chow_chow             |   -0.05   |    1.00   
lowchen               |   -0.04   |    1.00   
saluki                |   -0.01   |    1.00   
pharaoh_hound         | -6.03e-03 |    1.00   
st_bernard            |   -0.02   |    1.00   
tibetan_mastiff       |   -0.01   |    1.00   
canadian_eskimo_dog   | -2.73e-03 |    1.00   
black_russian_terrier | -3.10e-03 |    1.00   

The unique principal component accounted for 96.95% of the total variance of the original data.
summary(pca)
# (Explained) Variance of Components

Parameter                       |    PC1
----------------------------------------
Eigenvalues                     | 43.157
Variance Explained              |  0.969
Variance Explained (Cumulative) |  0.969
Variance Explained (Proportion) |  0.969
plot(pca)

pca_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  select(SSC_CODE16:caution) %>% 
  mutate(pca_raw = predict(pca)$Component_1, 
         pca = ntile(pca_raw, 10)) 

4.1 IRSD_d

ggplot(pca_results, aes(x = IRSD, y = pca_raw)) + 
  geom_point()

pca == IRSD_d <lgl> 
# total N=180 valid N=179 mean=0.12 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 158 | 87.78 |   88.27 |  88.27
TRUE  |  21 | 11.67 |   11.73 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.2 IRSAD_d

ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) + 
  geom_point()

pca == IRSAD_d <lgl> 
# total N=180 valid N=179 mean=0.12 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 158 | 87.78 |   88.27 |  88.27
TRUE  |  21 | 11.67 |   11.73 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.3 IER_d

ggplot(pca_results, aes(x = IER, y = pca_raw)) + 
  geom_point()

pca == IER_d <lgl> 
# total N=180 valid N=179 mean=0.08 sd=0.28

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 164 | 91.11 |   91.62 |  91.62
TRUE  |  15 |  8.33 |    8.38 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.4 IEO_d

ggplot(pca_results, aes(x = IEO, y = pca_raw)) + 
  geom_point()

pca == IEO_d <lgl> 
# total N=180 valid N=179 mean=0.11 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 159 | 88.33 |   88.83 |  88.83
TRUE  |  20 | 11.11 |   11.17 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

5 Computing Environment

 R version 4.1.2 (2021-11-01)
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 Running under: Windows 10 x64 (build 18363)
 
 Matrix products: default
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
  [1] parameters_0.16.0 modelbased_0.9.0  see_0.6.8         correlation_0.7.1
  [5] tmap_3.3-2        sf_1.0-5          DT_0.20           sjPlot_2.8.10    
  [9] sjmisc_2.8.9      scales_1.1.1      magrittr_2.0.1    forcats_0.5.1    
 [13] stringr_1.4.0     dplyr_1.0.7       purrr_0.3.4       readr_2.1.1      
 [17] tidyr_1.1.4       tibble_3.1.6      ggplot2_3.3.5     tidyverse_1.3.1  
 [21] pacman_0.5.1     
 
To cite R in publications use:

R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

To cite the ggplot2 package in publications use:

Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.